rm(list=ls())
library(foreign); library(survey); library(descr);library(car);library(AER); library(plyr)
library(apsrtable)
library(xtable)
library(ggplot2)
library(shape)
library(MBTAr)
library(chron)
library(stringr)
library(MASS) # for ordered logit
library(rdrobust)

source("rd.export.R")


######################
##### Load data: #####
######################
# Survey 1: March 2015:
dat <- read.csv("summer.csv",stringsAsFactors = F)
# Survey 2: July/August/September 2015
march <- read.csv("march.csv",stringsAsFactors = F)

combo <- rbind(march[,c("mayor_app_comb","runningvar_comb_min","treat_2","delay_comb_log","delay_comb","party","polinfo","educ_r","inc_r","age","race_black","male","commutetrip","voted","delay_source","delay_line")],
               dat[,c("mayor_app_comb","runningvar_comb_min","treat_2","delay_comb_log","delay_comb","party","polinfo","educ_r","inc_r","age","race_black","male","commutetrip","voted","delay_source","delay_line")])


########
### Analyses:
########

## Main results

## March data:
# effect on most recent trip rating:
fit_march_logdelay_trip <- (lm(triprating_1 ~ delay_comb_log,data=march));summary(fit_march_logdelay_trip)
fit_march_logdelay_trip_c <- (lm(triprating_1 ~ delay_comb_log + delay_line + polinfo + (party==-1) + (party==1),data=march));summary(fit_march_logdelay_trip_c)

# effect on local govt approval:
fit_march_logdelay_mayor <- (lm(mayor_app_comb ~  delay_comb_log, data=march)); summary(fit_march_logdelay_mayor)
fit_march_logdelay_mayor_c <- (lm(mayor_app_comb ~ delay_comb_log  + delay_line + polinfo + (party==-1) + (party==1), data=march)); summary(fit_march_logdelay_mayor_c) 

## Table 1:
apsrtable(fit_march_logdelay_trip,fit_march_logdelay_trip_c,fit_march_logdelay_mayor,fit_march_logdelay_mayor_c,
          omitcoef=c("delay_lineFitchburg Line","delay_lineFramingham/Worcester Line","delay_lineFranklin Line","delay_lineGreenbush Line","delay_lineHaverhill Line","delay_lineKingston/Plymouth Line","delay_lineLowell Line","delay_lineMiddleborough/Lakeville Line","delay_lineNeedham Line","delay_lineNewburyport/Rockport Line","delay_lineOrange Line","delay_lineProvidence/Stoughton Line","delay_lineRed Line" ,"polinfo","party == -1TRUE","party == 1TRUE"),
          coef.names = c("Intercept","log(Delay time)"),
          digits=3,Sweave=T,stars="default")
##


## Summer data:
fit_exp_logdelay <- lm(mayor_app_comb ~  delay_comb_log * treat_2, data=dat); summary(fit_exp_logdelay)
fit_exp_logdelay_c <- lm(mayor_app_comb ~  delay_comb_log * treat_2 + delay_line + polinfo + (party==-1) + (party==1), data=dat); summary(fit_exp_logdelay_c)

## Table D-4: Main Experimental Results
takeout <- c("delay_lineFairmount Line","delay_lineFitchburg Line","delay_lineFramingham/Worcester Line","delay_lineFranklin Line","delay_lineGreen Line","delay_lineGreenbush Line","delay_lineHaverhill Line","delay_lineKingston/Plymouth Line","delay_lineLowell Line","delay_lineMiddleborough/Lakeville Line","delay_lineNeedham Line","delay_lineNewburyport/Rockport Line","delay_lineOrange Line","delay_lineProvidence/Stoughton Line","delay_lineRed Line","polinfo","party == -1TRUE","party == 1TRUE")

apsrtable(fit_exp_logdelay,fit_exp_logdelay_c,digits=3,stars="default",Sweave=T,omitcoef = takeout,
          # order=c("delay_comb_log","treat_politics","delay_comb_log:treat_politics","(Intercept)"),
          coef.names=c("(Intercept)",
                       "log(Delay time)",
                       "Treatment",
                       "log(Delay time) $\\times$ Treatment"
          ))
####

## Main experimental result (Figure 1):
set.seed=02139
dat$delay_comb_log_jittered <- jitter(dat$delay_comb_log,amount = 0.08)
set.seed=02139
dat$mayor_app_comb_jittered <- jitter(dat$mayor_app_comb)


pdf("figures/delaylog_mayorapp_treat.pdf",width=7,height=4.75)
par(mar=c(3.5,5,0,0))
plot(dat$delay_comb_log_jittered,dat$mayor_app_comb_jittered,xlab="", ylab="Local Government Approval",,yaxt="n",frame.plot = F,  pch=ifelse(dat$treat_2==1,16,2), col=ifelse(dat$treat_2==1,"cornflowerblue","black"))
abline(lm(mayor_app_comb ~ delay_comb_log,data=dat[dat$treat_2==1,]),col="cornflowerblue",lwd=2,lty=1)
abline(lm(mayor_app_comb ~ delay_comb_log,data=dat[dat$treat_2==0,]),col="black",lwd=2,lty=2)
axis(2,at = c(seq(0,1,by=1/4)),tick = T,labels=c("Strongly\nDisapprove","","","","Strongly\nApprove"),cex.axis=0.7,las=1)
text(labels="Log(Delay)",x=2,y=-0.25,cex=1,xpd=T)
text(labels="Treatment",x=3.48,y=0.14,col="cornflowerblue",cex=1)
Arrows(x0=3.404841,y0=0.1059483,x1=2.88,y1=0.03,code=2,col="cornflowerblue",lwd=2,arr.length=0.25,arr.adj=1)
Arrows(x0=3.378768,y0=0.1608467,x1=3.037509,y1=0.4483873,code=2,col="cornflowerblue",lwd=2,arr.length=0.25,arr.adj=1)
text(labels="Control",x=3.75,y=0.91,col="black",cex=1)
Arrows(x0=3.822009,y0=0.8745264,x1=3.97,y1=0.78,code=2,col="black",lty=1,lwd=2,arr.length=0.25,arr.adj=1)
Arrows(x0=3.756827,y0=0.8745264,x1=3.603915,y1=0.6942852,code=2,col="black",lty=1,lwd=2,arr.length=0.25,arr.adj=1)
# locator(1) # to find points for end of arrows
dev.off()




#################
##### Demographic Balance
#################
# matched sample vs. matchable vs. full sample
dat_full <- dat[which(dat$optin==1 & !is.na(dat$treat_2)),]
matchable <- dat_full[which(dat_full$start_mode == "Commuter Rail" | (dat_full$start_mode == "Subway" & dat_full$start_line %in% c("Red Line","Orange Line","Blue Line"))),]
matched <- matchable[!is.na(matchable$delay_comb),]

d_matched <- c(median(matched$inc_r,na.rm=T),sd(matched$inc_r,na.rm=T),median(matched$age,na.rm=T),sd(matched$age,na.rm=T),mean(matched$race_black,na.rm=T),sd(matched$race_black,na.rm=T),mean(matched$male,na.rm=T),sd(matched$male,na.rm=T),mean(matched$commutetrip,na.rm=T),sd(matched$commutetrip,na.rm=T),mean((matched$ideo==1),na.rm=T),sd((matched$ideo==1),na.rm=T),mean(matched$cr_rider,na.rm=T),sd(matched$cr_rider,na.rm=T),nrow(matched))

d_matchable <- c(median(matchable$inc_r,na.rm=T),sd(matchable$inc_r,na.rm=T),median(matchable$age,na.rm=T),sd(matchable$age,na.rm=T),mean(matchable$race_black,na.rm=T),sd(matchable$race_black,na.rm=T),mean(matchable$male,na.rm=T),sd(matchable$male,na.rm=T),mean(matchable$commutetrip,na.rm=T),sd(matchable$commutetrip,na.rm=T),mean((matchable$ideo==1),na.rm=T),sd((matchable$ideo==1),na.rm=T),mean(matchable$cr_rider,na.rm=T),sd(matchable$cr_rider,na.rm=T),nrow(matchable))

d_full <- c(median(dat_full$inc_r,na.rm=T),sd(dat_full$inc_r,na.rm=T),median(dat_full$age,na.rm=T),sd(dat_full$age,na.rm=T),mean(dat_full$race_black,na.rm=T),sd(dat_full$race_black,na.rm=T),mean(dat_full$male,na.rm=T),sd(dat_full$male,na.rm=T),mean(dat_full$commutetrip,na.rm=T),sd(dat_full$commutetrip,na.rm=T),mean((dat_full$ideo==1),na.rm=T),sd((dat_full$ideo==1),na.rm=T),mean(dat_full$cr_rider,na.rm=T),sd(dat_full$cr_rider,na.rm=T),nrow(dat_full))


names <- c("Median income category","","Median age category","","% Black","" ,"% Male","" , "% Commuter (>4 times/week)","" ,"% Extremely Liberal","","% Commuter Rail rider","","Number of respondents")


## Table A-1:
print(xtable(data.frame(rownames=names,
                        matched=d_matched,
                        matchable=d_matchable,
                        full=d_full
),
digits=2),
include.rownames=F)
####


### Sorting check, using combined data:
# 1-minute binwidths:
width <- 1
combo <- mutate(combo, bin=cut(runningvar_comb_min, breaks=seq(-20,20, width)))
bin2.df <- data.frame(bin.mean=tapply(combo$runningvar_comb_min,
                                      combo$bin, mean, na.rm=TRUE),
                      mid=seq(-20 + width/2, 20 - width/2, width),
                      n=tapply(combo$runningvar_comb_min, combo$bin,
                               length))
bin2.df$n[is.na(bin2.df$n)] <- 0
# McCrary assessment:
mc.rd <- lm(n ~ mid * (mid>=0), data=bin2.df[which(bin2.df$mid>=(-10) & bin2.df$mid<=(10)),])
summary(mc.rd) # beta = -19.18, SE=15.4, p=0.231

apsrtable(mc.rd,digits = 2,
          omitcoef = "(Intercept)",
          coef.names = c("Bin midpoint","Midpoint $>=$ 0","Bin midpoint $\\times$ Midpoint $>=$ 0"))

# plotting:
rdplot_mccrary1_combo <- (ggplot(combo)
                          + geom_point(aes(x = runningvar_comb_min, y = -2),size=2, shape="|", alpha=.5)
                          + geom_smooth(data=subset(bin2.df, bin.mean > 0),
                                        aes(x = bin.mean, y = n),
                                        method = 'lm', formula = y ~ poly(x, 1), size=1.5)
                          + geom_smooth(data=subset(bin2.df, bin.mean < 0),
                                        aes(x = bin.mean, y = n),
                                        method = 'lm', formula = y ~ poly(x, 1), size=1.5)
                          + geom_point(data=bin2.df, aes(x=mid, y=n),size=3, pch=1)
                          + geom_vline(xintercept=0,lty="dotted")
                          + geom_hline(yintercept=0, lty='dotted')
                          + coord_cartesian(ylim = c(-0.05,100))
                          + scale_x_continuous(breaks=seq(-10,10,2),
                                               limits=c(-10,10))
                          + theme_bw()
                          + labs(x = "Minutes passenger arrived after closest train",
                                 y = "# Observations in 1-minute bin")
                          + theme(axis.text = element_text(size=15),axis.title = element_text(size=15))
)
ggsave(plot = rdplot_mccrary1_combo,path = "figures",filename="rdplot_mccrary1_combo.pdf",width=12,height=6)

# 2-minute binwidths:
width <- 2
combo <- mutate(combo, bin=cut(runningvar_comb_min, breaks=seq(-20,20, width)))
bin2.df <- data.frame(bin.mean=tapply(combo$runningvar_comb_min,
                                      combo$bin, mean, na.rm=TRUE),
                      mid=seq(-20 + width/2, 20 - width/2, width),
                      n=tapply(combo$runningvar_comb_min, combo$bin,
                               length))
bin2.df$n[is.na(bin2.df$n)] <- 0
# formal McCrary test:
mc.rd <- lm(n ~ mid * (mid>=0), data=bin2.df[which(bin2.df$mid>=(-10) & bin2.df$mid<=(10)),])
summary(mc.rd) # beta = -38.75, p=0.44

# plotting:
rdplot_mccrary2_combo <- (ggplot(combo)
                          + geom_point(aes(x = runningvar_comb_min, y = -2),size=2, shape="|", alpha=.5)
                          + geom_smooth(data=subset(bin2.df, bin.mean > 0),
                                        aes(x = bin.mean, y = n),
                                        method = 'lm', formula = y ~ poly(x, 1), size=1.5)
                          + geom_smooth(data=subset(bin2.df, bin.mean < 0),
                                        aes(x = bin.mean, y = n),
                                        method = 'lm', formula = y ~ poly(x, 1), size=1.5)
                          + geom_point(data=bin2.df, aes(x=mid, y=n),size=3, pch=1)
                          + geom_vline(xintercept=0,lty="dotted")
                          + geom_hline(yintercept=0, lty='dotted')
                          + coord_cartesian(ylim = c(-0.05,185))
                          + scale_x_continuous(breaks=seq(-10,10,2),
                                               limits=c(-10,10))
                          + theme_bw()
                          + labs(x = "Minutes passenger arrived after closest train",
                                 y = "# Observations in 2-minute bin")
                          + theme(axis.text = element_text(size=15),axis.title = element_text(size=15))
)
ggsave(plot = rdplot_mccrary2_combo,path = "figures",filename="rdplot_mccrary2_combo.pdf",width=12,height=6)




## Balance of delays:
f8 <- (lm(delay_comb ~ factor(party,levels = c(0,-1,1)) + polinfo + educ_r + inc_r + age + race_black + male + commutetrip + voted + delay_source, data=combo)) # sig pos Rep effect, marginal positive age effect
f8log <- (lm(delay_comb_log ~ factor(party,levels = c(0,-1,1)) + polinfo + educ_r + inc_r + age + race_black + male + commutetrip + voted + delay_source, data=combo)) # sig pos age effect



### Table B-3:
apsrtable(f8,f8log,digits=3,stars="default",
          model.names = c("Delay time","Log delay time"),coef.names = c("(Intercept)",
                                                                        "Democrat",
                                                                        "Republican",
                                                                        "Political information",
                                                                        "Education",
                                                                        "Income",
                                                                        "Age",
                                                                        "Race $=$ black",
                                                                        "Male",
                                                                        "Commuter ($>4$ times/week)",
                                                                        "Voted in 2014",
                                                                        "CR vs. Subway"
          ))
###



## Alternate specifications for main results (Appendix D.1):

# different operationalizations of delay:
# non-logged:
f1b <- (lm(mayor_app_comb ~  delay_comb * treat_2, data=dat));summary(f1b) # marginal neg intx (-0.011)

# binary version
f1c <- (lm(mayor_app_comb ~  (delay_comb>0) * treat_2, data=dat));summary(f1c) # sig neg intx (-0.144)

# binned version:
f1d <- (lm(mayor_app_comb ~  (delay_comb_bin2) * treat_2, data=dat));summary(f1d) # sig neg intx (-0.07)

## Table D-5: Alternate Specifications:
apsrtable(f1b,f1c,f1d,digits=3,stars="default",Sweave=T,se="robust",
          coef.names = c("(Intercept)",
                         "Delay time (raw)",
                         "Treatment",
                         "Delay time (raw) $\\times$ Treatment",
                         "Delay time $>0$",
                         "Delay time $>0$ $\\times$ Treatment",
                         "Delay time (4 bins)",
                         "Delay time (4 bins) $\\times$ Treatment"))
####

## subgroups:
fit_exp_logdelay_CRonly <- lm(mayor_app_comb ~  delay_comb_log * treat_2, data=dat[dat$delay_source=="CR",])
summary(fit_exp_logdelay_CRonly)
fit_exp_logdelay_SUBonly <- lm(mayor_app_comb ~  delay_comb_log * treat_2, data=dat[dat$delay_source=="Subway",])
summary(fit_exp_logdelay_SUBonly)
fit_exp_logdelay_CRonly_c <- lm(mayor_app_comb ~  delay_comb_log * treat_2 + delay_line + polinfo + (party==-1) + (party==1), data=dat[dat$delay_source=="CR",])
summary(fit_exp_logdelay_CRonly_c)
fit_exp_logdelay_SUBonly_c <- lm(mayor_app_comb ~  delay_comb_log * treat_2 + delay_line + polinfo + (party==-1) + (party==1), data=dat[dat$delay_source=="Subway",])
summary(fit_exp_logdelay_SUBonly_c)

## Table D-6:
apsrtable(fit_exp_logdelay_CRonly,fit_exp_logdelay_SUBonly,fit_exp_logdelay_CRonly_c,fit_exp_logdelay_SUBonly_c,digits=3,stars="default",Sweave=T,omitcoef = takeout,
          # order=c("delay_comb_log","treat_2","delay_comb_log:treat_2","(Intercept)"),
          coef.names=c("(Intercept)",
                       "log(Delay time)",
                       "Treatment",
                       "log(Delay time) $\\times$ Treatment"
          ))
####


# block bootstrap to account for grouped treatment:
sims <- 1000
ATEs <- NULL
intx <- NULL
ATEs_cont <- NULL
intx_cont <- NULL
bootclusters <- NULL
bootdata <- NULL
dat.usable <- dat[which(!is.na(dat$mayor_app_comb) & !is.na(dat$delay_comb_log) & !is.na(dat$treat_2)),]
for(i in 1:sims){
  bootclusters <- sample(unique(dat.usable$delay_line),length(unique(dat.usable$delay_line)),replace=T)
  samplerows <- NULL
  rows <- NULL
  for(j in 1:length(bootclusters)){
    rows <- which(dat.usable$delay_line == bootclusters[j])
    samplerows <- c(samplerows,rows)
  }
  bootdata <- dat.usable[samplerows,]
  bootfit1 <- lm(mayor_app_comb ~  delay_comb_log * treat_2, data=bootdata)
  bootfit2b <- lm(mayor_app_comb ~  delay_comb_log * treat_2 + delay_line + polinfo + (party==-1) + (party==1), data=bootdata)
  ATEs[i] <- summary(bootfit1)$coefficients["delay_comb_log",1]
  intx[i] <- summary(bootfit1)$coefficients["delay_comb_log:treat_2",1]
  ATEs_cont[i] <- summary(bootfit2b)$coefficients["delay_comb_log",1]
  intx_cont[i] <- summary(bootfit2b)$coefficients["delay_comb_log:treat_2",1]
}
quantile(ATEs,c(0.025,0.975)) # still insig
quantile(intx,c(0.025,0.975)) # still sig 
quantile(ATEs_cont,c(0.025,0.975)) # still insig
quantile(intx_cont,c(0.025,0.975)) # still sig

exp.effect <- fit_exp_logdelay_c$coefficients["delay_comb_log:treat_2"]



### substantive effect:
mean(dat$delay_comb,na.rm=T) # ~2
median(dat$delay_comb,na.rm=T) # 0
(log1p(2)-log1p(0))*fit_exp_logdelay_c$coefficients["delay_comb_log:treat_2"]+(log1p(2)-log1p(0))*fit_exp_logdelay_c$coefficients["delay_comb_log"] # -0.06
# And for a 6.5-minute delay:
(log1p(6.5))*fit_exp_logdelay_c$coefficients["delay_comb_log:treat_2"]+(log1p(6.5))*fit_exp_logdelay_c$coefficients["delay_comb_log"] # -0.12


# ordered logit:
f1e <- (polr(as.factor(mayor_app_comb) ~ delay_comb_log * treat_2,data=dat,method="logistic"));summary(f1e) # sig neg intx (-0.144)
ctable <- coef(summary(f1e))
p <- pnorm(abs(ctable[,"t value"]),lower.tail = F)*2
ctable <- cbind(ctable, "p value"=p) # -0.40 effect, p=0.118 for intx
f1e2 <- (polr(as.factor(mayor_app_comb) ~ delay_comb_log * treat_2+ delay_source + polinfo + (party==-1) + (party==1),data=dat,method="logistic"));summary(f1e2) # sig neg intx (-0.144)
ctable <- coef(summary(f1e2))
p <- pnorm(abs(ctable[,"t value"]),lower.tail = F)*2
(ctable <- cbind(ctable, "p value"=p)) # -0.46 effect, p=0.084 for intx



## Removing outliers:
test_bandwidths <- matrix(nrow=18, ncol=4)
colnames(test_bandwidths) <- c("bandwidth", "coef", "se", "n")
test_bandwidths <- as.data.frame(test_bandwidths)
test_bandwidths$bandwidth <- c(seq(5,85,5),100)

# for(i in 1:length(test_bandwidths$bandwidth)){
#   reg <- lm(mayor_app_comb ~  delay_comb_log * treat_politics + delay_source + polinfo + (party==-1) + (party==1), data=dat[dat$delay_comb<test_bandwidths$bandwidth[i],])
#   test_bandwidths$coef[i] <- coef(summary(reg))[8]
#   test_bandwidths$se[i] <- coef(summary(reg))[8,2]
#   test_bandwidths$n[i] <- nrow(model.frame(reg))
# }

for(i in 1:length(test_bandwidths$bandwidth)){
  reg <- lm(mayor_app_comb ~  delay_comb_log * treat_2 + delay_line + polinfo + (party==-1) + (party==1), data=dat[dat$delay_comb<test_bandwidths$bandwidth[i],])
  test_bandwidths$coef[i] <- coef(summary(reg))[22]
  test_bandwidths$se[i] <- coef(summary(reg))[22,2]
  test_bandwidths$n[i] <- nrow(model.frame(reg))
}


test_bandwidths$color <- "red"
test_bandwidths$color[test_bandwidths$bandwidth==100] <- "blue"



# Effect after outlier removal (Figure D-2):
pdf("figures/test_bandwidths2.pdf",width=10,height=7)
ggplot(data = test_bandwidths[1:17,], aes(x = c(1:17))) +
  geom_ribbon(aes(x = c(1:17), 
                  ymin = coef[1:17] - 1.96*se[1:17], ymax = coef[1:17] + 1.96*se[1:17], height=.1),
              colour = "gray",alpha=0.1) +
  geom_point(aes(y=coef[1:17]),colour = test_bandwidths$color[1:17], size = 3,shape=16) +
  geom_point(aes(x=19,y=test_bandwidths$coef[18]), colour = test_bandwidths$color[18], size = 3) +
  geom_errorbar(aes(x = 19, 
                    ymin = test_bandwidths$coef[18] - 1.96*test_bandwidths$se[18], ymax = test_bandwidths$coef[18] + 1.96*test_bandwidths$se[18], height=.1),
                colour = test_bandwidths$color[18]) +
  xlab("Subset") + ylab("Interaction Coefficient + 95% CI") +
  scale_x_continuous(breaks=c(1,4,7,10,13,16,19),labels=c("Delay < 5\nminutes","<20","<35","<50","<65","80","Full\nSample")) +
  expand_limits(y=c(-0.3,0.2)) +
  geom_hline(yintercept = 0,size=.5,colour="blue",linetype="dotted") +
  theme(axis.text.y = element_text(angle = 0, hjust = 0, color="black")) +  
#   ggtitle(title) +
  theme_bw() +
  theme(axis.title = element_text(size=15),axis.text = element_text(size=15))
dev.off()



### RDD framework (Appendix E):

# March data:

(rdfit_march <- with(march,
                   rdrobust(y = march$mayor_app_comb, x = march$runningvar_comb_min,c = 0)))
rd.export5(list(rdfit_march,rdfit_march))

# plotting RDD, March:
width <- 2
march <- mutate(march, bin=cut(runningvar_comb_min, breaks=seq(-20,20, width)))
bin2.df <- data.frame(bin.mean=tapply(march$mayor_app_comb,
                                      march$bin, mean, na.rm=TRUE),
                      mid=seq(-20 + width/2, 20 - width/2, width),
                      n=tapply(march$mayor_app_comb, march$bin,
                               length))
bin2.df$n[is.na(bin2.df$n)] <- 0
rdplot_march <- (ggplot(march)
                 + geom_point(data=march,
                              aes(x = runningvar_comb_min, y = mayor_app_comb),position=position_jitter(width = 0.01), pch=16, alpha=.3)
                 + geom_smooth(data=subset(march, runningvar_comb_min >= 0),
                               aes(x = runningvar_comb_min, y = mayor_app_comb),
                               method = 'lm', formula = y ~ poly(x, 1), size=1.5, col="black")
                 + geom_smooth(data=subset(march, runningvar_comb_min < 0),
                               aes(x = runningvar_comb_min, y = mayor_app_comb),
                               method = 'lm', formula = y ~ poly(x, 1), size=1.5, col="black")
                 + geom_point(data=bin2.df, aes(x=mid, y=bin.mean, size=n), pch=1)
                 + geom_vline(xintercept=0, lty='dotted')
                 + geom_hline(yintercept=0, lty='dotted')
                 # + coord_cartesian(ylim = c(-0.05,1.05))
                 + scale_y_continuous(breaks=seq(0,1,0.25),labels=c("Strongly\nDisapprove","","","","Strongly\nApprove"),limits=c(-0.05,1.05))
                 + scale_x_continuous(breaks=seq(-10,10,2),
                                      limits=c(-6,6))
                 
                 + labs(size = '# Observations in\n2-unit Bin')
                 + theme_bw()
                 + labs(x = "Minutes passenger arrived after closest train",
                        y = "Local government approval")
                 + theme(axis.text = element_text(size=15),axis.title = element_text(size=15))
)
ggsave(plot = rdplot_march,path = "figures",filename="rdplot_march.pdf",width=10,height=7)

# plotting just within the bandwidth:
width <- 1
march <- mutate(march, bin=cut(runningvar_comb_min, breaks=seq(-20,20, width)))
bin2.df <- data.frame(bin.mean=tapply(march$mayor_app_comb,
                                      march$bin, mean, na.rm=TRUE),
                      mid=seq(-20 + width/2, 20 - width/2, width),
                      n=tapply(march$mayor_app_comb, march$bin,
                               length))
bin2.df$n[is.na(bin2.df$n)] <- 0
rdplot_march_zoom <- (ggplot(march)
                      + geom_point(data=march,
                                   aes(x = runningvar_comb_min, y = mayor_app_comb),position=position_jitter(width = 0.01), pch=16, alpha=.3)
                      + geom_smooth(data=subset(march, runningvar_comb_min >= 0 & runningvar_comb_min<=fit_march$bws[2,2]),
                                    aes(x = runningvar_comb_min, y = mayor_app_comb),
                                    method = 'lm', formula = y ~ poly(x, 1), size=1.5, col="black")
                      + geom_smooth(data=subset(march, runningvar_comb_min < 0 & runningvar_comb_min>=-fit_march$bws[2,2]),
                                    aes(x = runningvar_comb_min, y = mayor_app_comb),
                                    method = 'lm', formula = y ~ poly(x, 1), size=1.5, col="black")
                      + geom_point(data=bin2.df, aes(x=mid, y=bin.mean, size=n), pch=1)
                      + geom_vline(xintercept=0, lty='dotted')
                      + geom_hline(yintercept=0, lty='dotted')
                      # + coord_cartesian(ylim = c(-0.05,1.05))
                      + scale_y_continuous(breaks=seq(0,1,0.25),labels=c("Strongly\nDisapprove","","","","Strongly\nApprove"),limits=c(-0.05,1.05))
                      + scale_x_continuous(breaks=seq(-10,10,1),
                                           limits=c(-3,3))
                      
                      + labs(size = '# Observations in\n2-unit Bin')
                      + theme_bw()
                      + labs(x = "Minutes passenger arrived after closest train",
                             y = "Local government approval")
                      + theme(axis.text = element_text(size=15),axis.title = element_text(size=15))
                      )
ggsave(plot = rdplot_march_zoom,path = "figures",filename="rdplot_march_zoom.pdf",width=10,height=7)



## Summer (experiment) data:
(rdfit0 <- with(subset(dat,treat_2==0),
              rdrobust(mayor_app_comb, runningvar_comb_min))) # beta=0.27, p=0.7005, h=1.07
(rdfit1 <- with(subset(dat,treat_2==1),
              rdrobust(mayor_app_comb, runningvar_comb_min))) # beta=-0.4961, p=0.09, h=1.337
rd.export5(list(rdfit0,rdfit1))

# plotting RD effect:
width <- 2
dat <- mutate(dat, bin=cut(runningvar_comb_min, breaks=seq(-20,20, width)))
bin2.df0 <- data.frame(bin.mean=tapply(dat$mayor_app_comb[dat$treat_2==0],
                                       dat$bin[dat$treat_2==0], mean, na.rm=TRUE),
                       mid=seq(-20 + width/2, 20 - width/2, width),
                       n=tapply(dat$runningvar_comb_min[dat$treat_2==0], dat$bin[dat$treat_2==0],
                                length))
bin2.df1 <- data.frame(bin.mean=tapply(dat$mayor_app_comb[dat$treat_2==1],
                                       dat$bin[dat$treat_2==1], mean, na.rm=TRUE),
                       mid=seq(-20 + width/2, 20 - width/2, width),
                       n=tapply(dat$runningvar_comb_min[dat$treat_2==1], dat$bin[dat$treat_2==1],
                                length))

rdplot_treat2 <- (ggplot(dat)
                  + geom_point(data=subset(dat,treat_2 == 0),
                               aes(x = runningvar_comb_min, y = mayor_app_comb),position=position_jitter(width = 0.01), pch=16, alpha=.3)
                  + geom_point(data=subset(dat,treat_2 == 1),
                               aes(x = runningvar_comb_min, y = mayor_app_comb),position=position_jitter(width = 0.01), pch=16, alpha=.3,col="cornflowerblue")
                  + geom_smooth(data=subset(dat, runningvar_comb_min >= 0 & treat_2 == 0),
                                aes(x = runningvar_comb_min, y = mayor_app_comb),
                                method = 'lm', formula = y ~ poly(x, 1), size=1.5, col="black")
                  + geom_smooth(data=subset(dat, runningvar_comb_min >= 0 & treat_2 == 1),
                                aes(x = runningvar_comb_min, y = mayor_app_comb),
                                method = 'lm', formula = y ~ poly(x, 1), size=1.5, col="cornflowerblue")
                  + geom_smooth(data=subset(dat, runningvar_comb_min < 0 & treat_2 == 0),
                                aes(x = runningvar_comb_min, y = mayor_app_comb),
                                method = 'lm', formula = y ~ poly(x, 1), size=1.5, col="black")
                  + geom_smooth(data=subset(dat, runningvar_comb_min < 0 & treat_2 == 1),
                                aes(x = runningvar_comb_min, y = mayor_app_comb),
                                method = 'lm', formula = y ~ poly(x, 1), size=1.5, col="cornflowerblue")
                  + geom_point(data=bin2.df0, aes(x=mid, y=bin.mean, size=n), pch=1)
                  + geom_point(data=bin2.df1, aes(x=mid, y=bin.mean, size=n), pch=1,col="cornflowerblue")
                  + geom_vline(xintercept=0, lty='dotted')
                  + geom_hline(yintercept=0, lty='dotted')
                  + scale_y_continuous(breaks=seq(0,1,0.25),labels=c("Strongly\nDisapprove","","","","Strongly\nApprove"),limits=c(-0.05,1.05))
                  + scale_x_continuous(breaks=seq(-10,10,2),
                                       limits=c(-6,6))
                  
                  + labs(size = '# Obs. in\n2-unit Bin')
                  + theme_bw()
                  + labs(x = "Minutes passenger arrived after closest train",
                         y = "Local government approval")
                  + annotate('text', x = 1.0, y = 0.85, 0.05, hjust=0, parse=TRUE, col="black",
                             label = paste('hat(tau)==',
                                           round(fit0$coef['Conventional', ], 2),
                                           '~(list(', round(fit0$ci['Robust', 1], 2),
                                           ',', round(fit0$ci['Robust', 2], 2),
                                           '))'))
                  + annotate('text', x = 1.0, y = 0.40, 0.05, hjust=0, parse=TRUE, col="cornflowerblue",
                             label = paste('hat(tau)==',
                                           round(fit1$coef['Conventional', ], 2),
                                           '~(list(', round(fit1$ci['Robust', 1], 2),
                                           ',', round(fit1$ci['Robust', 2], 2),
                                           '))'))
                  + annotate('text', x = 1.0, y = 0.89, 0.05, hjust=0, parse=F,
                             label = "Control:",col="black")
                  + annotate('text', x = 1.0, y = 0.44, 0.05, hjust=0, parse=F,
                             label = "Treatment:",col="cornflowerblue")
                  
)
ggsave(plot = rdplot_treat2,path = "figures",filename="rdplot_treat2.pdf",width=10,height=7)


# with smaller window/line bws:
width <- 1
dat <- mutate(dat, bin=cut(runningvar_comb_min, breaks=seq(-20,20, width)))
bin2.df0 <- data.frame(bin.mean=tapply(dat$mayor_app_comb[dat$treat_2==0],
                                       dat$bin[dat$treat_2==0], mean, na.rm=TRUE),
                       mid=seq(-20 + width/2, 20 - width/2, width),
                       n=tapply(dat$runningvar_comb_min[dat$treat_2==0], dat$bin[dat$treat_2==0],
                                length))
bin2.df1 <- data.frame(bin.mean=tapply(dat$mayor_app_comb[dat$treat_2==1],
                                       dat$bin[dat$treat_2==1], mean, na.rm=TRUE),
                       mid=seq(-20 + width/2, 20 - width/2, width),
                       n=tapply(dat$runningvar_comb_min[dat$treat_2==1], dat$bin[dat$treat_2==1],
                                length))


rdplot_treat2_zoom <- (ggplot(dat)
                       + geom_point(data=subset(dat,treat_2 == 0),
                                    aes(x = runningvar_comb_min, y = mayor_app_comb),position=position_jitter(width = 0.01), pch=16, alpha=.3)
                       + geom_point(data=subset(dat,treat_2 == 1),
                                    aes(x = runningvar_comb_min, y = mayor_app_comb),position=position_jitter(width = 0.01), pch=16, alpha=.3,col="cornflowerblue")
                       + geom_smooth(data=subset(dat, runningvar_comb_min >= 0 & runningvar_comb_min<=fit0$bws[2,2] & treat_2 == 0),
                                     aes(x = runningvar_comb_min, y = mayor_app_comb),
                                     method = 'lm', formula = y ~ poly(x, 1), size=1.5, col="black")
                       + geom_smooth(data=subset(dat, runningvar_comb_min >= 0 & runningvar_comb_min<=fit1$bws[2,2] & treat_2 == 1),
                                     aes(x = runningvar_comb_min, y = mayor_app_comb),
                                     method = 'lm', formula = y ~ poly(x, 1), size=1.5, col="cornflowerblue")
                       + geom_smooth(data=subset(dat, runningvar_comb_min < 0 & runningvar_comb_min>=-fit0$bws[2,2] & treat_2 == 0),
                                     aes(x = runningvar_comb_min, y = mayor_app_comb),
                                     method = 'lm', formula = y ~ poly(x, 1), size=1.5, col="black")
                       + geom_smooth(data=subset(dat, runningvar_comb_min < 0 & runningvar_comb_min>=-fit1$bws[2,2] & treat_2 == 1),
                                     aes(x = runningvar_comb_min, y = mayor_app_comb),
                                     method = 'lm', formula = y ~ poly(x, 1), size=1.5, col="cornflowerblue")
                       + geom_point(data=bin2.df0, aes(x=mid, y=bin.mean, size=n), pch=1)
                       + geom_point(data=bin2.df1, aes(x=mid, y=bin.mean, size=n), pch=1,col="cornflowerblue")
                       + geom_vline(xintercept=0, lty='dotted')
                       + geom_hline(yintercept=0, lty='dotted')
                       + scale_y_continuous(breaks=seq(0,1,0.25),labels=c("Strongly\nDisapprove","","","","Strongly\nApprove"),limits=c(-0.05,1.05))
                       + scale_x_continuous(breaks=seq(-10,10,1),
                                            limits=c(-3,3))
                       
                       + labs(size = '# Obs. in\n2-unit Bin')
                       + theme_bw()
                       + labs(x = "Minutes passenger arrived after closest train",
                              y = "Local government approval")
                       + annotate('text', x = 1.0, y = 0.85, 0.05, hjust=0, parse=TRUE, col="black",
                                  label = paste('hat(tau)==',
                                                round(fit0$coef['Conventional', ], 2),
                                                '~(list(', round(fit0$ci['Robust', 1], 2),
                                                ',', round(fit0$ci['Robust', 2], 2),
                                                '))'))
                       + annotate('text', x = 0.75, y = 0.38, 0.05, hjust=0, parse=TRUE, col="cornflowerblue",
                                  label = paste('hat(tau)==',
                                                round(fit1$coef['Conventional', ], 2),
                                                '~(list(', round(fit1$ci['Robust', 1], 2),
                                                ',', round(fit1$ci['Robust', 2], 2),
                                                '))'))
                       + annotate('text', x = 1.0, y = 0.89, 0.05, hjust=0, parse=F,
                                  label = "Control:",col="black")
                       + annotate('text', x = 0.75, y = 0.42, 0.05, hjust=0, parse=F,
                                  label = "Treatment:",col="cornflowerblue")
                       
)
ggsave(plot = rdplot_treat2_zoom,path = "figures",filename="rdplot_treat2_zoom.pdf",width=10,height=7)


rdplot_controlonly_zoom <- (ggplot(dat)
                          + geom_point(data=subset(dat,treat_2 == 0),
                                       aes(x = runningvar_comb_min, y = mayor_app_comb),
                                       position=position_jitter(width = 0.01), pch=2, size=2)
                          + geom_smooth(data=subset(dat, runningvar_comb_min >= 0 & runningvar_comb_min<=fit0$bws[2,2] & treat_2 == 0),
                                        aes(x = runningvar_comb_min, y = mayor_app_comb),
                                        method = 'lm', formula = y ~ poly(x, 1), size=2, col="black",lty=2)
                          + geom_smooth(data=subset(dat, runningvar_comb_min < 0 & runningvar_comb_min>=-fit0$bws[2,2] & treat_2 == 0),
                                        aes(x = runningvar_comb_min, y = mayor_app_comb),
                                        method = 'lm', formula = y ~ poly(x, 1), size=2, col="black",lty=2)
                          + geom_point(data=bin2.df0, aes(x=mid, y=bin.mean, size=n), pch=1)
                          + geom_vline(xintercept=0, lty='dotted')
                          + geom_hline(yintercept=0, lty='dotted')
                          + scale_y_continuous(breaks=seq(0,1,0.25),labels=c("Strongly\nDisapprove","","","","Strongly\nApprove"),limits=c(-0.05,1.05))
                          + scale_x_continuous(breaks=seq(-10,10,1),
                                               limits=c(-3,3))
                          + labs(size = '# Obs. in\n2-unit Bin')
                          + theme_bw()
                          + labs(x = "Minutes passenger arrived after closest train",
                                 y = "Local government approval")
                          + annotate('text', x = 0.8, y = 0.3, 0.05, hjust=0, parse=TRUE, col="black",size=5,
                                     label = paste('hat(tau)==',
                                                   round(rdfit0$coef['Conventional', ], 2),
                                                   '~(list(', round(rdfit0$ci['Robust', 1], 2),
                                                   ',', round(rdfit0$ci['Robust', 2], 2),
                                                   '))'))
                          + theme(axis.text = element_text(size=15),axis.title = element_text(size=15))
)
ggsave(plot = rdplot_controlonly_zoom,path = "figures",filename="rdplot_controlonly_zoom.pdf",width=10,height=7)


rdplot_treatonly_zoom <- (ggplot(dat)
                       + geom_point(data=subset(dat,treat_2 == 1),
                                    aes(x = runningvar_comb_min, y = mayor_app_comb),position=position_jitter(width = 0.01), pch=16,col="cornflowerblue",size=2)
                       + geom_smooth(data=subset(dat, runningvar_comb_min >= 0 & runningvar_comb_min<=fit1$bws[2,2] & treat_2 == 1),
                                     aes(x = runningvar_comb_min, y = mayor_app_comb),
                                     method = 'lm', formula = y ~ poly(x, 1), size=2, col="cornflowerblue")
                       + geom_smooth(data=subset(dat, runningvar_comb_min < 0 & runningvar_comb_min>=-fit1$bws[2,2] & treat_2 == 1),
                                     aes(x = runningvar_comb_min, y = mayor_app_comb),
                                     method = 'lm', formula = y ~ poly(x, 1), size=2, col="cornflowerblue")
                       + geom_point(data=bin2.df1, aes(x=mid, y=bin.mean, size=n), pch=1,col="cornflowerblue")
                       + geom_vline(xintercept=0, lty='dotted')
                       + geom_hline(yintercept=0, lty='dotted')
                       + scale_y_continuous(breaks=seq(0,1,0.25),labels=c("Strongly\nDisapprove","","","","Strongly\nApprove"),limits=c(-0.05,1.05))
                       + scale_x_continuous(breaks=seq(-10,10,1),
                                            limits=c(-3,3))
                       
                       + labs(size = '# Obs. in\n2-unit Bin')
                       + theme_bw()
                       + labs(x = "Minutes passenger arrived after closest train",
                              y = "Local government approval")
                       + annotate('text', x = 0.75, y = 0.36, 0.05, hjust=0, parse=TRUE, col="cornflowerblue",size=5,
                                  label = paste('hat(tau)==',
                                                round(rdfit1$coef['Conventional', ], 2),
                                                '~(list(', round(rdfit1$ci['Robust', 1], 2),
                                                ',', round(rdfit1$ci['Robust', 2], 2),
                                                '))'))
                       + theme(axis.text = element_text(size=15),axis.title = element_text(size=15))
)
ggsave(plot = rdplot_treatonly_zoom,path = "figures",filename="rdplot_treatonly_zoom.pdf",width=10,height=7)
